Thank Kieran for providing this code kindly.

Fit the CLVM model

secretory <- readRDS("../rds/20180910Fresh_secretory_5_7clusters_clincluster.rds")
sce <- readRDS("../rds/20181011_culture_11553n15072_sceset.rds")
m <- model.matrix(~ sce$total_features + sce$Patient2)
# sce<- scater::normalise(sce)
sce<- scater::normaliseExprs(sce, design = m)
## Warning: 'normalizeExprs' is deprecated.
## Use edgeR::calcNormFactors(), normalize(), limma::removeBatchEffect() directly instead.
exprs(sce) <- norm_exprs(sce)

gene_vars <- rowVars(exprs(sce))
var_cutoff <- sort(gene_vars, decreasing = TRUE)[7500]
sce_hvg <- sce[gene_vars >= var_cutoff, ]

is_na_mgi_symbol <- is.na(rowData(sce_hvg)$mgi_symbol)
rowData(sce_hvg)$mgi_symbol <- rowData(sce_hvg)$Geneid

sce_hvg$time <- NA
sce_hvg$time[sce$source == "Fresh"] <- 0
sce_hvg$time[sce$source == "O.N."] <- 0.5
sce_hvg$time[sce$source == "Long" & sce$Patient2 == 11553] <- 2
sce_hvg$time[sce$source == "Long" & sce$Patient2 == 15072] <- 6

sce$time <- sce_hvg$time

And perform inference:

pc1 <- prcomp(t(exprs(sce_hvg)), scale = TRUE)$x[,1]
pc1 <- scale_vec(pc1)
time_numeric <- as.numeric( sce$time)
pc1 <- pc1 * sign(cor(pc1, time_numeric))
x <- sce_hvg$time
# assay(sce, "norm_exprs") <- normalise(sce)

pcavi <- phenopath(sce_hvg, x, z_init = pc1, sce_assay = "norm_exprs")
## Iteration    ELBO    Change (%) 
## [ 1 ]     -12259050.0368339   Inf 
## [ 2 ]     -12171446.8072936   0.719743765283722 
## [ 3 ]     -12170149.6926922   0.010658164723716 
## [ 4 ]     -12169117.1019817   0.00848533794052712 
## [ 5 ]     -12168202.6395145   0.00751518111869619 
## [ 6 ]     -12167389.3429862   0.00668423196911412 
## [ 7 ]     -12166664.2801838   0.00595942146283749 
## [ 8 ]     -12166014.7024713   0.00533928100860801 
## [ 9 ]     -12165428.5520254   0.00481816520733624 
## [ 10 ]    -12164895.1311443   0.00438491968425485 
## [ 11 ]    -12164405.4242848   0.00402573609110709 
## [ 12 ]    -12163952.1085442   0.00372671428343931 
## [ 13 ]    -12163529.3773523   0.00347539911134304 
## [ 14 ]    -12163132.6869771   0.00326141616134619 
## [ 15 ]    -12162758.4967358   0.00307652446980571 
## [ 16 ]    -12162404.0390067   0.00291437225689607 
## [ 17 ]    -12162067.1317197   0.00277014822631597 
## [ 18 ]    -12161746.0330607   0.00264023486561942 
## [ 19 ]    -12161439.3324245   0.00252191067072253 
## [ 20 ]    -12161145.8700931   0.00241311414643824 
## [ 21 ]    -12160864.6785853   0.00231226574140791 
## [ 22 ]    -12160594.939845    0.00221813769470955 
## [ 23 ]    -12160335.9537883   0.00212976070484431 
## [ 24 ]    -12160087.1149079   0.00204635771154652 
## [ 25 ]    -12159847.8945814   0.00196729702986733 
## [ 26 ]    -12159617.8274345   0.00189205902834269 
## [ 27 ]    -12159396.5006183   0.00182021218067817 
## [ 28 ]    -12159183.5452163   0.00175139557045801 
## [ 29 ]    -12158978.6292401   0.00168530583425554 
## [ 30 ]    -12158781.4518416   0.001621687167301 
## [ 31 ]    -12158591.7384759   0.00156032351246122 
## [ 32 ]    -12158409.2368297   0.00150103227040926 
## [ 33 ]    -12158233.7133744   0.00144365916455387 
## [ 34 ]    -12158064.9504408   0.00138807396014201 
## [ 35 ]    -12157902.7437322   0.00133416685424643 
## [ 36 ]    -12157746.9002128   0.0012818454000921 
## [ 37 ]    -12157597.2363143   0.00123103188544425 
## [ 38 ]    -12157453.5764184   0.00118166106971106 
## [ 39 ]    -12157315.7515744   0.00113367824607489 
## [ 40 ]    -12157183.5984215   0.00108703756789537 
## [ 41 ]    -12157056.958284    0.00104170061779055 
## [ 42 ]    -12156935.6764191   0.000997635162032823 
## [ 43 ]    -12156819.6013918   0.000954814096869996 
## [ 44 ]    -12156708.5845637   0.000913214521302936 
## [ 45 ]    -12156602.4796753   0.000872816961197061 
## [ 46 ]    -12156501.142513    0.000833604678941544 
## [ 47 ]    -12156404.4306463   0.000795563089786837 
## [ 48 ]    -12156312.2032267   0.000758679261585724 
## [ 49 ]    -12156224.3208424   0.00072294144954577 
## [ 50 ]    -12156140.6454164   0.000688338744545721 
## [ 51 ]    -12156061.0401489   0.000654860709333412 
## [ 52 ]    -12155985.3694921   0.000622497103050494 
## [ 53 ]    -12155913.4991578   0.000591237625796828 
## [ 54 ]    -12155845.2961506   0.000561071693979495 
## [ 55 ]    -12155780.6288252   0.000531988256134128 
## [ 56 ]    -12155719.3669619   0.000503975630695985 
## [ 57 ]    -12155661.3818595   0.000477021369630234 
## [ 58 ]    -12155606.546441    0.000451112153761454 
## [ 59 ]    -12155554.73537     0.000426233702044099 
## [ 60 ]    -12155505.8251747   0.000402370712136175 
## [ 61 ]    -12155459.6943769   0.000379506814210675 
## [ 62 ]    -12155416.223624    0.00035762455222534 
## [ 63 ]    -12155375.2958214   0.000336705380376969 
## [ 64 ]    -12155336.7962628   0.000316729674050692 
## [ 65 ]    -12155300.6127566   0.000297676769637783 
## [ 66 ]    -12155266.6357467   0.000279525006543378 
## [ 67 ]    -12155234.7584271   0.000262251781114199 
## [ 68 ]    -12155204.8768453   0.000245833633234485 
## [ 69 ]    -12155176.8899984   0.000230246315220467 
## [ 70 ]    -12155150.6999165   0.000215464888064893 
## [ 71 ]    -12155126.2117348   0.000201463821524594 
## [ 72 ]    -12155103.3337529   0.000188217090327371 
## [ 73 ]    -12155081.9774823   0.000175698285269684 
## [ 74 ]    -12155062.0576803   0.000163880710288516 
## [ 75 ]    -12155043.4923701   0.000152737505089063 
## [ 76 ]    -12155026.2028501   0.000142241733947475 
## [ 77 ]    -12155010.1136879   0.000132366505570438 
## [ 78 ]    -12154995.1527062   0.000123085049260748 
## [ 79 ]    -12154981.2509529   0.000114370832406825 
## [ 80 ]    -12154968.3426643   0.000106197632684426 
## [ 81 ]    -12154956.3652154   9.85396293624684e-05 
## [ 82 ]    -12154945.259063    9.13714714951217e-05 
## [ 83 ]    -12154934.9676805   8.46683467986703e-05 
## [ 84 ]    -12154925.4374839   7.84060471984323e-05 
## [ 85 ]    -12154916.6177526   7.25610187252703e-05 
## [ 86 ]    -12154908.460545    6.71103993937042e-05 
## [ 87 ]    -12154900.9206081   6.20320725382023e-05 
## [ 88 ]    -12154893.9552847   5.73046824635015e-05 
## [ 89 ]    -12154887.5244173   5.29076666722531e-05 
## [ 90 ]    -12154881.5902498   4.88212691333772e-05 
## [ 91 ]    -12154876.1173279   4.50265548959752e-05 
## [ 92 ]    -12154871.0723984   4.15054136670513e-05 
## [ 93 ]    -12154866.4243092   3.82405616244464e-05 
## [ 94 ]    -12154862.14391     3.52155306615942e-05 
## [ 95 ]    -12154858.2039528   3.24146697363987e-05 
## [ 96 ]    -12154854.5789952   2.98231263273443e-05 
## [ 97 ]    -12154851.2453048   2.7426830094624e-05 
## [ 98 ]    -12154848.1807665   2.52124767083394e-05 
## [ 99 ]    -12154845.3647919   2.31675067715933e-05 
## [ 100 ]   -12154842.7782316   2.12800804362642e-05 
## [ 101 ]   -12154840.4032904   1.95390567069628e-05 
## [ 102 ]   -12154838.2234458   1.79339661086352e-05 
## [ 103 ]   -12154836.2233698   1.6454981192614e-05 
## [ 104 ]   -12154834.3888541   1.50928893559169e-05 
## [ 105 ]   -12154832.7067376   1.38390759118821e-05 
## [ 106 ]   -12154831.1648391   1.26854783352119e-05 
## [ 107 ]   -12154829.7518925   1.1624569270271e-05 
## [ 108 ]   -12154828.457484    1.06493360450285e-05 
## [ 109 ]   -12154827.2719951   9.75323508339251e-06

Save for decision analysis:

dec_df <- data_frame(gene = rownames(sce_hvg),
                     m = pcavi$m_beta[1,],
                     s = pcavi$s_beta[1,])
# write_csv(dec_df, "../tables/Phenopath/dec_df_new.csv")

Plot the elbo:

plot_elbo(pcavi) +
  xlab(expression("CAVI iterations")) +
  theme_cowplot(font_size = 11)

ggsave("../plots/20181012phenopath/elbo.png", width = 5, height = 3)

Exploring PhenoPath results

Comparison to PC1

xs <- sce_hvg$time
 
qplot(scale_vec(pc1), pcavi$m_z, color = factor(xs)) +
  scale_color_brewer(palette = "Set1", name = "Source") +
  #stat_function(fun = function(x) x, color = 'black') + 
  xlab("PC1") + ylab(expression(z))

qplot(scale_vec(pc1), pcavi$m_z, color = factor(sce$Sample.Name)) +
  scale_color_brewer(palette = "Set1", name = "Source") +
  #stat_function(fun = function(x) x, color = 'black') + 
  xlab("PC1") + ylab(expression(z))

Identifying significant genes

df_beta <- data_frame(
  lambda = pcavi$m_lambda,
  beta = pcavi$m_beta[1,],
  chi = pcavi$chi_exp[1,],
  alpha = pcavi$m_alpha[1,],
  pos_sd = sqrt(pcavi$s_beta[1,]),
  gene = rowData(sce_hvg)$Geneid, #stringr::str_to_title(rownames(sce_hvg)),
  # ensembl_gene_id = rowData(sce_hvg)$ensembl_gene_id,
  is_sig = significant_interactions(pcavi, n = 3) #[,1]
)


# write_csv(df_beta, "../tables/Phenopath/df_gene_shalek.csv")


df_beta$is_sig_graph <- 
  plyr::mapvalues(df_beta$is_sig, from = c(FALSE, TRUE),
                  to = c("Non-significant", "Significant"))

This gives:

sum(df_beta$is_sig)
## [1] 1055

significant interactions.

We can plot these:

chi_cutoff <- 0.15
df_beta <- dplyr::mutate(df_beta, 
                  gene_symbol = gene)


ggplot(df_beta, aes(x = beta, y = 1 / chi, color = is_sig_graph)) + 
  geom_point(alpha = 0.8) +
  theme(legend.title = element_blank()) +
  # geom_point(data = dplyr::filter(df_beta, is_mmr), alpha = 0.6) +
  scale_color_brewer(palette = "Set1", name = "'Significant'") +
     geom_label_repel(data = dplyr::filter(df_beta, 1 / chi > chi_cutoff), aes(label = gene_symbol)) +
  xlab(expression(beta)) +
  ylab(expression(paste("[", chi, "]"^-1)))

Which genes are sig?

dplyr::filter(df_beta, is_sig) %>% dplyr::arrange(desc(abs(beta))) %>% 
  dplyr::select(gene, beta, alpha) %>% 
  datatable()

We can plot expression along phenotime for the 35 significant genes:

tmap <- pcavi$m_z

top_genes <- dplyr::filter(df_beta, is_sig) %>% 
  dplyr::arrange(desc(abs(beta))) %>% 
  extract2("gene") %>% head(n=20)

Check z loosely corresponds to time:

zdf <- data_frame(z = pcavi$m_z, time = as.factor(sce_hvg$time), stimulant = xs, patient = sce_hvg$Patient)

ggplot(zdf, aes(x = time, y = z, fill = time)) + 
  geom_violin(alpha = 0.8) +
  theme(legend.position = "none") + 
  scale_fill_brewer(palette = "Set3") +
  # scale_fill_brewer(palette = "Set1") +
  xlab("Capture time") +
  ylab("Pseudotime)") +
     facet_grid(~patient, scales = "free")

  theme(axis.text = element_text(size = 8),
        axis.title = element_text(size = 9),
        legend.title = element_text(size = 11),
        legend.text = element_text(size = 10)) 
## List of 4
##  $ axis.title  :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : num 9
##   ..$ hjust        : NULL
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi FALSE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text   :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : num 8
##   ..$ hjust        : NULL
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi FALSE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ legend.text :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : num 10
##   ..$ hjust        : NULL
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi FALSE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ legend.title:List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : num 11
##   ..$ hjust        : NULL
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi FALSE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  - attr(*, "class")= chr [1:2] "theme" "gg"
##  - attr(*, "complete")= logi FALSE
##  - attr(*, "validate")= logi TRUE
# ggsave(filename = "../plots/20181012phenopath/violin_pseudotimeVSpatient.png",width = 8, height = 2 )
  
time_plot <- last_plot()
zdf <- data_frame(z = pcavi$m_z, time = as.factor(sce_hvg$time), stimulant = xs, patient = sce_hvg$Patient)

ggplot(zdf, aes(x = time, y = z, fill = time)) + 
  geom_violin(alpha = 0.8) +
  theme(legend.position = "none") + 
  scale_fill_brewer(palette = "Set3") +
  # scale_fill_brewer(palette = "Set1") +
  xlab("Capture time (day)") +
  ylab("Pseudotime") +
     # facet_grid(~patient, scales = "free")
  theme(axis.text = element_text(size = 12),
        axis.title = element_text(size = 15),
        legend.title = element_text(size = 15),
        legend.text = element_text(size = 15)) 

# ggsave(filename = "../plots/20181012phenopath/violin_pseudotime.png",width = 3, height = 3)
# ggsave(filename = "../manuscript_plots/PhenoPath/violin_pseudotime.png",width = 3, height = 3)
  

# time_plot <- last_plot()
select <- dplyr::select
sce_hvg <- runPCA(sce_hvg, ncomponents = 3, return_SCE = TRUE)

tmap <- pcavi$m_z
sce_hvg$tmap <- tmap

df_pca <- as_data_frame(reducedDim(sce_hvg)[,c(1,2)]) %>% 
  mutate(pseudotime = sce_hvg$tmap, Stimulant = sce_hvg$source)

df_no_stim <- select(df_pca, -Stimulant)

df_pca$Stimulant <- factor(df_pca$Stimulant, levels = c("Fresh","O.N.","Long"))

plt <- ggplot(df_pca, aes(x = PC1, y = PC2)) +
  geom_point(data = df_no_stim, fill = "grey80", color = "grey80", size = 3) +
  geom_point(aes(fill = pseudotime), shape = 21, color = 'grey20', size = 3) +
  scale_fill_viridis(name = "Pseudotime", option = "C") + 
  facet_wrap(~ Stimulant) +
  theme(legend.position = c(.4,.08),
        legend.direction = "horizontal") +
  theme(strip.background = element_rect(fill = "grey95", color = "grey30", size = 1),
        legend.text = element_text(size = 8),
        axis.text = element_text(size = 9),
        axis.title = element_text(size = 10),
        legend.title = element_text(size = 10)) +
  xlim(-18, 20) + ylim(-15, 10)

plt
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).

# ggsave("../plots/20181012phenopath/PC1PC2.png", width = 15,height = 5)
tiff
## function (filename = "Rplot%03d.tiff", width = 480, height = 480, 
##     units = "px", pointsize = 12, compression = c("none", "rle", 
##         "lzw", "jpeg", "zip", "lzw+p", "zip+p"), bg = "white", 
##     res = NA, ..., type = c("cairo", "Xlib", "quartz"), antialias) 
## {
##     if (!checkIntFormat(filename)) 
##         stop("invalid 'filename'")
##     g <- .geometry(width, height, units, res)
##     new <- list(...)
##     type <- if (!missing(type)) 
##         match.arg(type)
##     else getOption("bitmapType")
##     if (!missing(antialias)) 
##         new$antialias <- match.arg(antialias, aa.cairo)
##     d <- check.options(new, name.opt = ".X11.Options", envir = .X11env)
##     antialias <- match(d$antialias, aa.cairo)
##     comp <- switch(match.arg(compression), none = 1L, rle = 2L, 
##         lzw = 5L, jpeg = 7L, zip = 8L, `lzw+p` = 15L, `zip+p` = 18L)
##     if (type == "quartz" && capabilities("aqua")) {
##         width <- g$width/ifelse(is.na(res), 72, res)
##         height <- g$height/ifelse(is.na(res), 72, res)
##         invisible(.External(C_Quartz, "tiff", path.expand(filename), 
##             width, height, pointsize, d$family, d$antialias != 
##                 "none", "", bg, "white", if (is.na(res)) NULL else res))
##     }
##     else if (type == "cairo" && capabilities("cairo")) 
##         invisible(.External(C_devCairo, filename, 8L, g$width, 
##             g$height, pointsize, bg, res, antialias, comp, d$family, 
##             300))
##     else invisible(.External2(C_X11, paste0("tiff::", comp, ":", 
##         filename), g$width, g$height, pointsize, d$gamma, d$colortype, 
##         d$maxcubesize, bg, bg, d$fonts, res, 0L, 0L, "", 0, 0, 
##         d$family))
## }
## <bytecode: 0x7fb0be5a58f8>
## <environment: namespace:grDevices>
library(grid)
genes <- c("FOS", "JUN", "CCND1")
mm <- match(genes, rowData(sce_hvg)$mgi_symbol)
gexp <- t(exprs(sce_hvg)[mm, ])
colnames(gexp) <- genes
gexp_df <- as_data_frame(gexp) %>% 
  mutate(pseudotime = sce_hvg$tmap)

df_bar <- inner_join(df_pca, gexp_df)
## Joining, by = "pseudotime"
classify_cell <- function(row) {
  if(row['pseudotime'] < 0.2) return("beginning")
  if(row['pseudotime'] > 0.2 && row['Stimulant'] == "O.N.") return("Overnight-end")
  if(row['pseudotime'] > 0.2 && row['Stimulant'] == "Long") return("Long-end")
  return(NA)
}

df_bar$cell_class <- sapply(seq_len(nrow(df_bar)), function(i) classify_cell(df_bar[i, ]))

df_group <- filter(df_bar, !is.na(cell_class)) %>% 
  gather(gene, expression, -(PC1:Stimulant), -cell_class) %>% 
  group_by(cell_class, gene) %>% 
  summarise(mean_expression = mean(expression))
df_group$gene <- factor(df_group$gene, levels = genes)

plts <- list()
for(cl in unique(df_group$cell_class)) {
  plts[[cl]] <- filter(df_group, cell_class == cl) %>% 
  ggplot(aes(x = gene, y = mean_expression, fill = gene)) +
    geom_bar(stat = "identity", color = 'grey20') +
    scale_fill_brewer(palette = "Dark2") +
    theme(legend.position = "None",
          axis.title.x = element_blank(),
          axis.title.y = element_text(size = 8, face = "bold"),
          axis.text.y = element_text(size = 8),
          axis.text.x = element_text(size = 8, angle = -90, face = "bold")) +
    labs(y = "Mean z-score\n expression") +
    ylim(-6, 6)
}

width = 0.12
height = 0.3

pca_plot <- ggdraw() + 
  draw_plot(plt) +
  draw_plot(plts[["beginning"]], x = 0.41, y = 0.2, width = width, height = height) +
  draw_plot(plts[["beginning"]], x = 0.88, y = 0.15, width = width, height = height) +
  draw_plot(plts[["Overnight-end"]], x = 0.06, y = 0.105, width = width, height = height) +
  draw_plot(plts[["Long-end"]], x = 0.56, y = 0.61, width = width, height = height)
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).
pca_plot

make_gene_plot <- function(genes) {
  inds <- match(genes, rowData(sce_hvg)$Geneid)
  
  df <- t(exprs(sce_hvg))[, inds, drop=FALSE] 
  colnames(df) <- genes #rownames(sce_hvg)[inds]
  
  df <- as_data_frame(df) %>% 
    dplyr::mutate(phenopath = pcavi$m_z, x = factor(sce_hvg$source,levels = c("Fresh","O.N.","Long"))) %>% 
    gather(gene, expression, -phenopath, -x)
  
  df$gene <- factor(df$gene, levels = genes)
  
  df_nox <- dplyr::select(df, -x)
  
  ggplot(df, aes(x = phenopath, y = expression, fill = x, color = x)) + 
    geom_point(data = df_nox, color = "grey80", fill = "grey80") +
    geom_point(shape = 21, color = 'grey30', alpha = 0.3) +
    facet_grid(gene ~ x, scales = "free") + 
    scale_fill_brewer(palette = "Set1", name = "MSI") +
    scale_color_brewer(palette = "Set1", name = "MSI") +
    theme(legend.position = "none", 
          strip.text = element_text(face = "bold"),
          strip.text.x = element_text(size = 10, margin = margin(.1, 0, .1, 0, "cm")),
          strip.text.y = element_text(size = 8, margin = margin(0, .1, 0, .1, "cm")),
          strip.background = element_rect(colour = "black", fill = "grey95", 
                                          size = 0.3, linetype = 1)) +
  theme(axis.text = element_text(size = 9),
        axis.text.y = element_text(size = 8),
        axis.title = element_text(size = 10)) +
    stat_smooth(se = FALSE, method = "lm", size = 1.5, formula = y ~ ns(x,3),
                color = 'grey30') +
    ylab(expression("Normalised expression")) + 
    xlab("Pseudotime")
}

genes <- c("ESR1","LGR5")

make_gene_plot(genes)

# ggsave("../manuscript_plots/PhenoPath/ESR1_LGR5.png", height = 3, width = 8)
# gene_plot <- plot_over_pseudotime <- last_plot()

Now do the same for original time series:

inds <- match(genes, rowData(sce_hvg)$mgi_symbol)

df <- t(exprs(sce_hvg))[, inds, drop=FALSE] 
colnames(df) <- genes
  
df <- as_data_frame(df) %>% 
    dplyr::mutate(capture_time = sce_hvg$time, x = as.character(xs)) %>% 
    gather(gene, expression, -capture_time, -x)
  
df$gene <- factor(df$gene, levels = genes)

df_g <- group_by(df, capture_time, x, gene) %>% 
  summarise(median_expression = median(expression))

ggplot(df, aes(x = capture_time, y = expression, fill = x, color = x)) + 
    # geom_beeswarm(shape = 21, color = 'grey30', alpha = 0.3) +
    geom_jitter(shape = 21, color = 'grey30', alpha = 0.3, width= 0.1) +
    facet_grid(gene ~ x, scales = "free") + 
    scale_fill_brewer(palette = "Set1") +
    scale_color_brewer(palette = "Set1") +
    theme(legend.position = "none", 
          strip.text = element_text(face = "bold"),
          strip.text.x = element_text(size = 10, margin = margin(.1, 0, .1, 0, "cm")),
          strip.text.y = element_text(size = 8, margin = margin(0, .1, 0, .1, "cm")),
          strip.background = element_rect(colour = "black", fill = "grey95", 
                                          size = 0.3, linetype = 1)) +
  theme(axis.text = element_text(size = 9),
        axis.text.y = element_text(size = 8),
        axis.title = element_text(size = 10)) +
    ylab(expression("Normalised expression")) + 
    xlab("Capture time") +
      geom_point(data = df_g, aes(y = median_expression), 
                 shape = 21, color = 'black', size = 2) +
  geom_line(data = df_g, aes(y = median_expression, group = 1))
## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?

plot_over_capture_time <- last_plot()

# ggsave("../../figs/supplementary_time_shalek.png", width = 6, height = 4)

Differential expression comparison

We’ll use limma-voom for standard differential expression:

dge <- DGEList(counts(sce_hvg))
dge <- calcNormFactors(dge)

design <- model.matrix(~ x, colData(sce_hvg))
v <- voom(dge, design, plot = TRUE)

fit <- lmFit(v, design)
fit <- eBayes(fit)
results <- decideTests(fit)


df_limma <- data_frame(coef = fit$coefficients[,2], 
                       pval = fit$p.value[,2],
                       mu = pcavi$m_mu,
                       gene_symbol = rowData(sce_hvg)$Geneid) %>% 
  left_join(df_beta, by = "gene_symbol") %>%
  dplyr::mutate(qval = p.adjust(pval, method = "BH"),
                log10qval = -log10(qval))
cols <- RColorBrewer::brewer.pal(3, "Set2")
cols2 <- c("#c5e2d9", cols[2])

ggplot(df_limma, aes(x = beta, y = log10qval, color = is_sig_graph)) + 
  geom_point() +
  ylab(expression(paste("Limma Voom -", log[10], "(q-value)"))) + 
  xlab(expression(paste("PhenoPath ", beta))) +
  geom_hline(yintercept = -log10(0.05), linetype = 2, alpha = 0.5) +
  theme(axis.text = element_text(size = 8),
        axis.title = element_text(size = 9),
        axis.title.y = element_text(size = 8)) +
  theme(legend.title = element_text(size = 9),
        legend.text = element_text(size = 8)) +
  scale_color_manual(values = cols2, name = "Interaction") +
  theme(legend.margin = margin(), legend.position = "none")

de_plot <- ggExtra::ggMarginal(last_plot(), margins = "y", type = "histogram", size = 10)

Again no obvious relationship.

Crossover bit for chris

df_beta <- mutate(df_beta,
                  crossover = - alpha / beta)

filter(df_beta, is_sig) %>% 
  ggplot(aes(x = crossover)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# write_csv(df_beta, "../tables/Phenopath/shalek_interactions.csv")
textinfo <- frame_data(
  ~x, ~y, ~label,
  1.2, 0.2, "Gene upregulated along pseudotime\nLPS increases upregulation",
  1.2, -0.2, "Gene upregulated along pseudotime\nPAM increases upregulation",
  -1.3, 0.2, "Gene downregulated along pseudotime\nPAM increases downregulation",
  -1.3, -0.2, "Gene downregulated along pseudotime\nLPS increases downregulation"
)

cols <- RColorBrewer::brewer.pal(3, "Set2")
cols2 <- c("#c5e2d9", cols[2])

outline_cols = c("#c5e2d9", 'black')

ggplot(df_beta, aes(x = lambda, y = beta)) +
  geom_point(shape = 21, aes(fill = is_sig_graph, color = is_sig_graph)) +
  geom_vline(xintercept = 0, linetype = 2, alpha = 0.5) +
  geom_hline(yintercept = 0, linetype = 2, alpha = 0.5) +
  scale_fill_manual(values = cols2, name = "Interaction") +
  scale_color_manual(values = outline_cols, name = "Interaction") +
  geom_text_repel(data = dplyr::filter(df_beta, (beta > 1 & is_sig) | beta < -1),
                aes(label = gene), color = 'black',
                size = 3) +
  ylab("Covariate-pseudotime interaction") +
  xlab("Gene regulation over pseudotime") +
  theme(legend.position = 'bottom',  #, legend.margin = margin(),
        axis.text = element_text(size = 10),
        axis.title = element_text(size = 11),
        legend.title = element_text(size = 11),
        legend.text = element_text(size = 10)) 

  # geom_text(data = textinfo, aes(x = x, y = y, label = label), 
            # color = 'black', size = 3, fontface = "bold")
lplot <- ggExtra::ggMarginal(last_plot(), margins = "y", type = "histogram", size = 10)
# 
# ggsave("../manuscript_plots/")
print(lplot)

GO analysis

genome <- "hg19"
# supportedGeneIDs()
id <- "geneSymbol"
# source("https://bioconductor.org/biocLite.R")
# biocLite("org.Hs.eg.db")

all_genes <- rowData(sce_hvg)$Geneid

upreg_genes <- filter(df_beta, is_sig, beta > 0) %>% extract2("gene_symbol") 
downreg_genes <- filter(df_beta, is_sig, beta < 0) %>% extract2("gene_symbol")

upg <- 1 * (all_genes %in% upreg_genes)
downg <- 1 * (all_genes %in% downreg_genes)

names(upg) <- names(downg) <- all_genes

pwfup <- nullp(upg, genome = "hg19", id = "geneSymbol")
## Loading hg19 length data...

goup <- goseq(pwfup, genome = "hg19", id = "geneSymbol", test.cats = "GO:BP")
## Fetching GO annotations...
## Loading required package: AnnotationDbi
## 
## Attaching package: 'AnnotationDbi'
## The following object is masked _by_ '.GlobalEnv':
## 
##     select
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## For 479 genes, we could not find any categories. These genes will be excluded.
## To force their use, please run with use_genes_without_cat=TRUE (see documentation).
## This was the default behavior for version 1.15.1 and earlier.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
pwfdown <- nullp(downg, genome = "hg19", id = "geneSymbol")
## Loading hg19 length data...
## Warning in pcls(G): initial point very close to some inequality constraints

godown <- goseq(pwfdown, genome = "hg19", id = "geneSymbol", test.cats = "GO:BP")
## Fetching GO annotations...
## For 479 genes, we could not find any categories. These genes will be excluded.
## To force their use, please run with use_genes_without_cat=TRUE (see documentation).
## This was the default behavior for version 1.15.1 and earlier.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns

Graph results:

parse_go <- function(go, type, n_tested) {
  go <- go %>% 
    mutate(qval = p.adjust(over_represented_pvalue)) %>% 
    mutate(log10qval = -log10(qval),
           prop_in_cat = numInCat / n_tested) %>% 
    head(n = 6) %>% 
    mutate(type = type) %>% 
    tbl_df() %>% 
    arrange(desc(log10qval))
  go
}


gos <- bind_rows(
  parse_go(goup, "Up-regulated", length(upreg_genes)),
  parse_go(godown, "Down-regulated", length(downreg_genes))
)

# gos <- filter(gos, type == "LPS up-regulated") 

gos$term <- stringr::str_to_title(gos$term)
gos$term <- factor(gos$term, levels = gos$term[order(gos$log10qval)])

gos <- mutate(gos, prop_cat = numDEInCat / numInCat)


gos %>% 
  filter(qval < 0.05) %>% 
ggplot(aes(x = term, y = log10qval)) + #, size = 100 * prop_cat)) +
  geom_segment(aes(y = min(log10qval - 1), yend = log10qval, x = term, xend = term),
               color = 'grey30', linetype = 3) +
    geom_point(aes(color = type, shape = type)) +
    coord_flip() +
  facet_wrap(~ type, ncol = 1, scales = "free") +
    theme(axis.title.y = element_blank()) +
    ylab(expression(paste("-", log[10], " q-value"))) +
  scale_size(name = "% category\nrepresented") +
    theme(legend.position = "bottom", #c(0.6,0.1),
          legend.direction = "horizontal",
        axis.text = element_text(size = 8),
        axis.text.y = element_text(size = 8),
        axis.title = element_text(size = 10),
        legend.title = element_blank(),
        legend.text = element_text(size = 10),
        legend.margin = margin(t = -.3, l = -4.5, unit = "cm")) +
  scale_color_brewer(palette = "Set1") +
  theme(strip.text = element_text(size = 8, face = "bold"),
          strip.background = element_rect(colour = "black", fill = "grey95", 
                                          size = 0.5, linetype = 1))

# ggsave("../../figs/go.png", width = 5, height = 5)
# goplot <- last_plot()

Technical

sessionInfo()
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.6
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
##  [1] grid      splines   stats4    parallel  stats     graphics  grDevices
##  [8] utils     datasets  methods   base     
## 
## other attached packages:
##  [1] org.Hs.eg.db_3.6.0          AnnotationDbi_1.42.1       
##  [3] bindrcpp_0.2.2              ggbeeswarm_0.6.0           
##  [5] phenopath_1.4.0             goseq_1.32.0               
##  [7] geneLenDataBase_1.16.0      BiasedUrn_1.07             
##  [9] RColorBrewer_1.1-2          edgeR_3.22.5               
## [11] limma_3.36.5                DT_0.4                     
## [13] viridis_0.5.1               viridisLite_0.3.0          
## [15] cowplot_0.9.3               ggrepel_0.8.0              
## [17] magrittr_1.5                forcats_0.3.0              
## [19] stringr_1.3.1               dplyr_0.7.6                
## [21] purrr_0.2.5                 readr_1.1.1                
## [23] tidyr_0.8.1                 tibble_1.4.2               
## [25] tidyverse_1.2.1             scater_1.8.4               
## [27] SingleCellExperiment_1.2.0  SummarizedExperiment_1.10.1
## [29] DelayedArray_0.6.6          BiocParallel_1.14.2        
## [31] matrixStats_0.54.0          GenomicRanges_1.32.7       
## [33] GenomeInfoDb_1.16.0         IRanges_2.14.12            
## [35] S4Vectors_0.18.3            ggplot2_3.0.0              
## [37] Biobase_2.40.0              BiocGenerics_0.26.0        
## 
## loaded via a namespace (and not attached):
##  [1] colorspace_1.3-2         rjson_0.2.20            
##  [3] rprojroot_1.3-2          XVector_0.20.0          
##  [5] rstudioapi_0.8           bit64_0.9-7             
##  [7] lubridate_1.7.4          xml2_1.2.0              
##  [9] tximport_1.8.0           knitr_1.20              
## [11] jsonlite_1.5             Rsamtools_1.32.3        
## [13] broom_0.5.0              GO.db_3.6.0             
## [15] shinydashboard_0.7.0     shiny_1.1.0             
## [17] compiler_3.5.1           httr_1.3.1              
## [19] backports_1.1.2          assertthat_0.2.0        
## [21] Matrix_1.2-14            lazyeval_0.2.1          
## [23] cli_1.0.1                later_0.7.5             
## [25] prettyunits_1.0.2        htmltools_0.3.6         
## [27] tools_3.5.1              gtable_0.2.0            
## [29] glue_1.3.0               GenomeInfoDbData_1.1.0  
## [31] reshape2_1.4.3           Rcpp_0.12.19            
## [33] cellranger_1.1.0         Biostrings_2.48.0       
## [35] nlme_3.1-137             rtracklayer_1.40.6      
## [37] crosstalk_1.0.0          DelayedMatrixStats_1.2.0
## [39] rvest_0.3.2              miniUI_0.1.1.1          
## [41] mime_0.6                 XML_3.98-1.16           
## [43] zlibbioc_1.26.0          scales_1.0.0            
## [45] hms_0.4.2                promises_1.0.1          
## [47] rhdf5_2.24.0             yaml_2.2.0              
## [49] memoise_1.1.0            gridExtra_2.3           
## [51] biomaRt_2.36.1           ggExtra_0.8             
## [53] stringi_1.2.4            RSQLite_2.1.1           
## [55] GenomicFeatures_1.32.3   rlang_0.2.2             
## [57] pkgconfig_2.0.2          bitops_1.0-6            
## [59] evaluate_0.12            lattice_0.20-35         
## [61] Rhdf5lib_1.2.1           bindr_0.1.1             
## [63] labeling_0.3             GenomicAlignments_1.16.0
## [65] htmlwidgets_1.3          bit_1.1-14              
## [67] tidyselect_0.2.5         plyr_1.8.4              
## [69] R6_2.3.0                 DBI_1.0.0               
## [71] mgcv_1.8-24              pillar_1.3.0            
## [73] haven_1.1.2              withr_2.1.2             
## [75] RCurl_1.95-4.11          modelr_0.1.2            
## [77] crayon_1.3.4             rmarkdown_1.10          
## [79] progress_1.2.0           locfit_1.5-9.1          
## [81] readxl_1.1.0             data.table_1.11.8       
## [83] blob_1.1.1               digest_0.6.18           
## [85] xtable_1.8-3             httpuv_1.4.5            
## [87] munsell_0.5.0            beeswarm_0.2.3          
## [89] vipor_0.4.5